import pandas as pd
import numpy as np
def prepare_data_to_model(path='../data/survival.csv'):
data = pd.read_csv(path, index_col = 0).drop(['id','study','etype'], axis = 1)
data['treatment'] = np.where(data['rx']=='Obs', 0, np.where(data['rx']=='Lev',1,2))
data.drop('rx', axis=1, inplace = True)
data = data.fillna(-1)
return data.drop(['treatment','time'], axis=1), data['treatment'], data['time']
Zamieniam zmiennÄ… 'rx' na zmiennÄ… kategorycznÄ…, 'Obs' = 0, 'Lev'=1, 'Lev+FU'=2
Podział zbioru na treatment, target i feature
X, treatment, y = prepare_data_to_model()
X_nodup = X.drop_duplicates()#.reset_index(drop=True)
X_nodup
Load model
import pickle
with open('pickles/opt_XGBTRegressor.pickle', 'rb') as file:
xg = pickle.load(file)
te, lb, ub = xg.estimate_ate(X, treatment, y)
# pred = xg.predict(X)
print('Average Treatment 1 Effect (XGBoost): {:.2f} ({:.2f}, {:.2f})'.format(te[0], lb[0], ub[0]))
print('Average Treatment 2 Effect (XGBoost): {:.2f} ({:.2f}, {:.2f})'.format(te[1], lb[1], ub[1]))
from dalex import Explainer
func_pred = (lambda xg, x: xg.predict(x)[:,0])
exp = Explainer(model = xg, data = X, y =y, model_type= 'regression', predict_function = func_pred)
for i, row in X_nodup.iterrows():
print(i)
break_down = exp.predict_parts(row, type='break_down')
break_down.plot(max_vars=13)